Celeb-A의 20만2,599개의 유명 인사 얼굴 사진을 사용하는 대신에

1만6,000개의 훈련 샘플만 사용하여 훈련과정의 속도와 난이도를 조절

데이터 증식(data augmentation)

일반화 성능을 높이고 작은 데이터셋에서의 과대적합을 막기 위한 기법이다.

데이터 증식은 부족한 훈력 데이터를 해결하기 위해 적용할 수 있는 다양한 기술의 집합이다.

데이터 증식 기법은 데이터를 수정하거나, 심지어 더 많은 데이터를 인공적으로 합성할 수도 있다.

과대적합을 줄이고, 머신 러닝과 딥러닝 모델의 성능을 높일 수 있다.

이미지 데이터에 대하여서 이미지를 뒤집거나 대비, 명도, 채도를 바꿀 수 있다.(tf.image)

데이터 로드

import tensorflow as tf

import tensorflow_datasets as tfds

#celeba_bldr=tfds.builder('celeb_a')

#celeba_bldr.download_and_prepare()

#celeba=celeba_bldr.as_dataset(shuffle_files=False)

#tfds celeb_a 데이터 다운로드 불가 데이터셋 수동으로 만들어야 함

celeba_train=celeba['train']

celeba_valid=celeba['validation']

celeba_test=celeba['test']

def count_items(ds):

n=0

for _ in ds:

n+=1

return n

celeba_train=celeba_train.take(16000)

celeba_valid=celeba_valid.take(1000)

위의 코드에서 tfds.builder가 정상 작동하지 않음

수동으로 데이터를 받아서 수동으로 데이터셋 생성, Celeb_A dataset(self) 참조

이미지 변환 & 데이터 증식

celeb_train의 5개의 이미지에 각기 다른 변환을 적용

1. 이미지를 바운딩 박스로 잘라내기

2. 이미지를 수평으로 뒤집기

3. 대비(contrast) 조정하기

4. 명도(brightness) 조정하기

5. 이미지 중앙부를 잘라서 우너본 이미지 크기(218, 178)로 확대하기

import matplotlib.pyplot as plt

examples=[]

for example in celeba_train.take(5):

examples.append(example[0])

fig=plt.figure(figsize=(16, 8.5))

#1. 바운딩 박스로 자르기

ax=fig.add_subplot(2, 5, 1)

ax.set_title('Crop to a \nbounding-box ',size=15)

ax.imshow(examples[0])

ax=fig.add_subplot(2, 5, 6)

img_cropped=tf.image.crop_to_bounding_box(examples[0], 50, 20, 128, 128)

ax.imshow(img_cropped)

#2. 이미지 (수평으로)뒤집기

ax=fig.add_subplot(2, 5, 2)

ax.set_title('Flip (horizontal)', size=15)

ax.imshow(examples[1])

ax=fig.add_subplot(2, 5, 7)

img_flipped=tf.image.flip_left_right(examples[1])

ax.imshow(img_flipped)

#3. 대비 조정

ax=fig.add_subplot(2, 5, 3)

ax.set_title('Adjust constrast', size=15)

ax.imshow(examples[2])

ax=fig.add_subplot(2, 5, 8)

img_adj_contrast=tf.image.adjust_contrast(examples[2], contrast_factor=2)

ax.imshow(img_adj_contrast)

#4. 명도 조정

ax=fig.add_subplot(2, 5, 4)

ax.set_title('Adjust brightness', size=15)

ax.imshow(examples[3])

ax=fig.add_subplot(2, 5, 9)

img_adj_brightness=tf.image.adjust_brightness(examples[3], delta=0.3)

ax.imshow(img_adj_brightness)

#5. 이미지 중앙 자르기

ax=fig.add_subplot(2, 5, 5)

ax.set_title('Central crop/nand resize', size=15)

ax.imshow(examples[4])

ax=fig.add_subplot(2, 5, 10)

img_center_crop=tf.image.central_crop(examples[4], 0.7)

img_resized=tf.image.resize(img_center_crop, size=(218, 178))

ax.imshow(img_resized.numpy().astype('uint8'))

plt.show()

랜덤하게 변환을 수행

tf.random.set_seed(1)

fig=plt.figure(figsize=(14, 12))

for i, example in enumerate(celeba_train.take(3)):

image=example[0]

ax=fig.add_subplot(3, 4, i*4+1)

ax.imshow(image)

if i==0:

ax.set_title('Orig', size=15)

ax=fig.add_subplot(3, 4, i*4+2)

img_crop=tf.image.random_crop(image, size=(178, 178, 3))

ax.imshow(img_crop)

if i==0:

ax.set_title('Step 1: Random crop', size=15)

ax=fig.add_subplot(3, 4, i*4+3)

img_flip=tf.image.random_flip_left_right(img_crop)

ax.imshow(tf.cast(img_flip, tf.uint8))

if i==0:

ax.set_title('Step 2: Random flip', size=15)

ax=fig.add_subplot(3, 4, i*4+4)

img_resize=tf.image.resize(img_flip, size=(128, 128))

ax.imshow(tf.cast(img_resize, tf.uint8))

if i==0:

ax.set_title('Step 3: Resize', size=15)

plt.show()

모델 훈련중 위와 같은 데이터 증식과 전처리 과정 래퍼 함수

def preprocess(example, size=(64, 64), mode='train'):

image=example[0]

label=example[1]

if mode=='train':

image_cropped=tf.image.random_crop(image, size=(178, 178, 3))

image_resized=tf.image.resize(image_cropped, size=size)

image_flip=tf.image.random_flip_left_right(image_resized)

return image_flip/255.0, tf.cast(label, tf.int32)

else:

imge_cropped=tf.image.crop_to_bounding_box(image, offset_height=20, offset_width=20, target_height=178, target_width=178)

image_resized=tf.image.resize(image_cropped, size=size)

return image_resized/255.0, tf.cast(label, tf.int32)

test preprocess

tf.random.set_seed(1)

ds=celeba_train.shuffle(1000, reshuffle_each_iteration=False)

ds=ds.take(2).repeat(5)

ds=ds.map(lambda x:preprocess(x, size=(178, 178), mode='train'))

fig=plt.figure(figsize=(15, 6))

for j, example in enumerate(ds):

ax=fig.add_subplot(2, 5, j//2+(j%2)*5+1

ax.set_xticks([]);ax.set_yticks([])

ax.imshow(example[0])

#self making

#for j, example in enumerate(ds):

#ax=fig.add_subplot(2, 5, j//2+(j%2)*5+1)

#ax.set_xticks([]);ax.set_yticks([])

#example=preprocess(example, size=(178, 178), mode='train')

#ax.imshow(example[0])

plt.show()

기존 코드 lambda를 이용할 때 오류 발생

TypeError: <lambda>() takes 1 positional argument but 2 were given

아래 코드에서도 위와 같이 for을 이용해서 preprocess 해주겠음

train, valid 데이터셋 변환

GIOVANNI에서 실행

import numpy as np

BATCH_SIZE=32

BUFFER_SIZE=1000

IMAGE_SIZE=(64, 64)

steps_per_epochs=np.ceil(TRAIN_NUM/BATCH_SIZE)

ds_train=celeba_train.map(lambda x: preprocess(x, size=IMAGE_SIZE, mode='train'))

ds_train=ds.train.shuffle(buffer_size=BUFFER_SIZE).repeat()

ds+train=ds_train.batch(BATCH_SIZE)

ds_valid=celba_valid.map(lambda x: preprocess(x, size=IMAGE_SIZE, mode='eval'))

ds_valid=ds_valid.batch(BATCH_SIZE)

tf.datasets.map 작동 오류로 직접만든 데이터셋 이용 Celeb-A dataset(self making) 참조

Celeb-A dataset preprocess(CNN)